#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2021/4/9 0009 13:05
# @Author : Moyan# @Site : 
# @File : NcwuSchoolNotifySpiderXpath.py
# @Software: PyCharm
import requests
from lxml import etree
import csv
from multiprocessing.dummy import Pool

result_list = []
page = 1

def getHtmlParseResult(page):
    if page <= 1:
        pageCont = "";
    else :
        pageCont = "_{}".format(page);
    url = "https://www5.ncwu.edu.cn/channels/5{}.html".format(pageCont)
    print(url)
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
        }
    response = requests.get(url,headers = headers)
    data = response.content
    selector = etree.HTML(data)
    info = selector.xpath('//div[@class="news-list"]/div')
    item_list = []
    if len(info) < 1 :
        return;
    for item in info :
        date = item.xpath('div[1]/div[2]/text()') + item.xpath('div[1]/div[1]/text()')
        title = item.xpath('div[2]/h2/a/text()')
        link = item.xpath('div[2]/h2/a[2]/@href')
        desc = item.xpath('div[2]/div/div/div/text()')

        result = {}
        dateStr = "".join(date).replace("-","").replace("\n","");
        titleStr = "".join(title).replace("\n","");
        linkStr = "".join(link).replace("\n","");
        descStr = "".join(desc).replace("\r\n","").replace("\n","").replace("\t","").replace(" ","").strip()

        result['date'] = dateStr
        result['title'] = titleStr
        result['link'] = linkStr
        result['desc'] = descStr
        item_list.append(result)
    return item_list;

pool = Pool(10)
ori_num = [x for x in range(122)]
myResult = pool.map(getHtmlParseResult,ori_num)
for r in myResult:
    if len(r) > 1 :
        result_list.extend(r)

print(len(result_list))

with open('notify.csv','w',encoding='utf-8',newline='') as file :
    writer = csv.DictWriter(file,fieldnames=["date","title","desc",'link'])
    writer.writeheader()
    writer.writerows(result_list)


